vmp_snomed_codes in a subsequent query# Define SQL query for Secondary Care Medicines Data (SCMD) data
# Select variables and calculate quantity of medicines grouped by
# year_month, ods_code, vmp_snomed_code, vmp_snomed_name
sql_query_scmd <- dbplyr::sql("SELECT
year_month,
ods_code,
vmp_snomed_code,
vmp_product_name,
SUM(total_quanity_in_vmp_unit) AS total_quantity
FROM ebmdatalab.scmd.scmd
GROUP BY
year_month,
ods_code,
vmp_snomed_code,
vmp_product_name")
# BIG database stuff read from csv instead
# I'm not adding this to github, so needs to be run once to set up
# db_scmd <- dplyr::tbl(conn_ebm_scmd, sql_query_scmd)
# Collect data here
# df_scmd <- db_scmd %>%
# collect()
# Write csv do disk
# write_csv(df_scmd, here("data/scmd.csv"))
# Once the commented code above is run, we can use the local csv file
df_scmd <- read_csv(here("data/scmd.csv"),
col_types = cols(vmp_snomed_code = col_character()))
# create lookup table to join vmp names to codes
df_scmd_lookup <- df_scmd %>%
select(vmp_snomed_code, vmp_product_name) %>%
distinct()
# get date range for tables or in text use
scmd_date_range <- range(df_scmd$year_month)
n_ods_codes <- length(unique(df_scmd$ods_code))
n_snomed_codes <- length(unique(df_scmd$vmp_snomed_code))
sum_quantity_positive <- sum(df_scmd$total_quantity[which(df_scmd$total_quantity > 0)])
sum_quantity_negative <- sum(df_scmd$total_quantity[which(df_scmd$total_quantity < 0)])
# calculate total count for each product
df_scmd_distinct <- df_scmd %>%
group_by(vmp_snomed_code) %>%
summarise(count = sum(total_quantity, na.rm = TRUE)) %>%
ungroup() %>%
arrange(desc(count)) %>%
left_join(df_scmd_lookup)
The date range in this notebooks is from 2019-01-01 to 2020-11-01
The SCMD dataset has n = 220 unique ODS codes and n = 10870 unique SNOMED codes
The sum of all positive quantities is: 38,562,782,582
The sum of all negative quantities is: -143,874,967
The table below shows the first 100 entries of the SCMD dataset (arranged by vmp_snomed_code) from BigQuery
reactable(head(df_scmd, 100),
style = list(fontSize = "12px"),
highlight = TRUE,
filterable = TRUE)
temp_ggplot <- df_scmd %>%
select(year_month, ods_code) %>%
distinct() %>%
group_by(year_month) %>%
count() %>%
ggplot(aes(x = year_month,
y = n)) +
geom_line(size = 1, alpha = 0.5) +
geom_point() +
scale_colour_viridis_d() +
scale_x_date(date_breaks = "4 month",
date_labels = "%b %y") +
labs(x = NULL, y = "Number of Hospitals",
colour = NULL) +
# geom_vline(xintercept = as.numeric(as.Date("2020-03-31")),
# color = "orange",
# linetype = 2,
# lwd = .5,
# alpha = .5) +
theme(text = element_text(size = 12))
# temp_ggplot
plotly::ggplotly(temp_ggplot,
tooltip = "text") %>%
plotly::config(displayModeBar = FALSE)
Figure. Number of hospitals contributing monthly data.
db_dmd_info <- dplyr::tbl(conn_ebm_scmd, sql_query_dmd_info)
# Define SQL query for Dictionary of Medicines and Devices (dm+d) information
# Rename some variables to match names across different queries (e.g., vmp_snomed_code)
sql_query_dmd_info <- dbplyr::sql("SELECT
CAST(a.id AS STRING) AS vmp_snomed_code,
a.nm AS vmp_product_name,
a.vtm AS vtmid,
j.nm AS vtmnm,
b.form AS form_cd,
c.descr AS form_descr,
a.df_ind AS df_ind_cd,
d.descr AS df_descr,
a.udfs,
e.descr AS udfs_descr,
f.descr AS unit_dose_descr,
g.strnt_nmrtr_val,
h.descr AS strnt_nmrtr_uom,
g.strnt_dnmtr_val,
i.descr AS strnt_dnmtr_descr,
a.bnf_code,
k.presentation AS bnf_presentation
FROM ebmdatalab.dmd.vmp AS a
LEFT JOIN ebmdatalab.dmd.dform AS b
ON a.id = b.vmp
LEFT JOIN ebmdatalab.dmd.form AS c
ON b.form = c.cd
LEFT JOIN ebmdatalab.dmd.dfindicator AS d
ON a.df_ind = d.cd
LEFT JOIN ebmdatalab.dmd.unitofmeasure AS e
ON a.udfs_uom = e.cd
LEFT JOIN ebmdatalab.dmd.unitofmeasure AS f
ON a.unit_dose_uom = f.cd
LEFT JOIN ebmdatalab.dmd.vpi AS g
ON a.id = g.vmp
LEFT JOIN ebmdatalab.dmd.unitofmeasure AS h
ON g.strnt_nmrtr_uom = h.cd
LEFT JOIN ebmdatalab.dmd.unitofmeasure AS i
ON g.strnt_dnmtr_uom = i.cd
LEFT JOIN ebmdatalab.dmd.vtm AS j
ON a.vtm = j.id
LEFT JOIN ebmdatalab.hscic.bnf AS k
ON a.bnf_code = k.presentation_code")
db_dmd_info_distinct <- db_dmd_info %>%
select(vmp_snomed_code, udfs, vtmid, form_cd, strnt_dnmtr_val, strnt_nmrtr_val, strnt_dnmtr_val, strnt_dnmtr_descr, bnf_code) %>%
distinct()
df_dmd_info_distinct <- db_dmd_info_distinct %>%
collect()
df_dmd_info_nmrt <- df_dmd_info_distinct %>%
drop_na(strnt_nmrtr_val) %>%
left_join(df_scmd_lookup) %>%
dplyr::relocate(vmp_product_name, vmp_snomed_code) %>%
arrange(vmp_snomed_code) %>%
mutate(vmp_product_name = fct_explicit_na(vmp_product_name))
reactable(df_dmd_info_nmrt,
columns = list(
udfs = reactable::colDef(show = FALSE),
vtmid = reactable::colDef(show = FALSE),
form_cd = reactable::colDef(show = FALSE),
bnf_code = reactable::colDef(show = FALSE)
),
style = list(fontSize = "12px"),
highlight = TRUE,
filterable = TRUE)
# get ddd data and make var names consistent
df_ddd <- read_csv(here("data/ddd_week492021.csv"),
col_types = cols(VPID = col_character())) %>%
janitor::clean_names() %>%
rename(vmp_snomed_code = vpid)
reactable(df_ddd,
style = list(fontSize = "12px"),
highlight = TRUE,
filterable = TRUE)
TRUE = “ddd info available”; FALSE = “product listed in dm+d dataset but no ddd info not available”; (Missing) = “product NOT listed in dm+d dataset, therefore no ddd info not available”)df_ddd_avail <- df_ddd %>%
mutate(ddd_avail = factor(!is.na(ddd)),
bnf_avail = factor(!is.na(bnf))) %>%
select(vmp_snomed_code, ddd_avail)
df_scmd_tab <- df_scmd_distinct %>%
left_join(df_ddd_avail) %>%
mutate(ddd_avail = fct_explicit_na(ddd_avail)) %>%
select(vmp_product_name, vmp_snomed_code, count, ddd_avail) %>%
mutate(positive_count = factor(count >= 0, levels = c(F, T),
labels = c("Negative", "Positive")))
df_scmd_tab_shared <- SharedData$new(df_scmd_tab)
bscols(widths = c(3, 9),
list(
filter_checkbox("positive_count", "Count",
df_scmd_tab_shared, ~positive_count,
inline = TRUE),
filter_select("ddd_avail", "Filter ddd availability",
df_scmd_tab_shared, ~ddd_avail)),
reactable(df_scmd_tab_shared,
columns = list(
vmp_product_name = colDef(name = "Name",
minWidth = 100),
vmp_snomed_code = colDef(name = "Code",
minWidth = 50),
count = colDef(name = "Total count",
minWidth = 30,
format = colFormat(digits = 0)),
ddd_avail = colDef(name = "ddd available",
minWidth = 30),
positive_count = reactable::colDef(show = FALSE)
),
columnGroups = list(
colGroup(name = "SNOMED",
columns = c("vmp_product_name", "vmp_snomed_code"))
),
style = list(fontSize = "12px"),
highlight = TRUE,
filterable = TRUE,
defaultSorted = list(count = "desc")))